import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
stop=pd.read_csv('plant_vase1.CSV')
stop.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4117 entries, 0 to 4116 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 4117 non-null int64 1 month 4117 non-null int64 2 day 4117 non-null int64 3 hour 4117 non-null int64 4 minute 4117 non-null int64 5 second 4117 non-null int64 6 moisture0 4117 non-null float64 7 moisture1 4117 non-null float64 8 moisture2 4117 non-null float64 9 moisture3 4117 non-null float64 10 moisture4 4117 non-null float64 11 irrgation 4117 non-null bool dtypes: bool(1), float64(5), int64(6) memory usage: 358.0 KB
stop.dtypes
year int64 month int64 day int64 hour int64 minute int64 second int64 moisture0 float64 moisture1 float64 moisture2 float64 moisture3 float64 moisture4 float64 irrgation bool dtype: object
print(list(stop))
['year', 'month', 'day', 'hour', 'minute', 'second', 'moisture0', 'moisture1', 'moisture2', 'moisture3', 'moisture4', 'irrgation']
stop.drop(['irrgation','year','month'],axis=1,inplace=True)
duplicate_rows_stop = stop[stop.duplicated()]
stop.head()
| day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 22 | 16 | 11 | 0.70 | 0.64 | 0.73 | 0.40 | 0.02 |
| 1 | 6 | 22 | 17 | 11 | 0.70 | 0.64 | 0.71 | 0.39 | 0.02 |
| 2 | 6 | 22 | 18 | 11 | 0.69 | 0.63 | 0.70 | 0.39 | 0.02 |
| 3 | 6 | 22 | 19 | 11 | 0.69 | 0.63 | 0.70 | 0.39 | 0.02 |
| 4 | 6 | 22 | 20 | 12 | 0.69 | 0.62 | 0.69 | 0.39 | 0.02 |
stop.isnull().sum()
day 0 hour 0 minute 0 second 0 moisture0 0 moisture1 0 moisture2 0 moisture3 0 moisture4 0 dtype: int64
sns.lmplot(x="day", y="moisture4", hue="moisture4",palette="rocket", data=stop)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="icefire", data=stop)
plt.show()
sns.lmplot(x="minute", y="moisture4", hue="moisture4",palette="coolwarm",data=stop)
plt.show()
sns.lmplot(x="hour", y="moisture4", hue="moisture4",palette="cubehelix",data=stop)
plt.show()
plt.show()
sns.lmplot(x="moisture0", y="moisture4", hue="moisture4",palette="YlOrBr", data=stop)
plt.show()
sns.lmplot(x="moisture1", y="moisture4", hue="moisture4",palette="Blues", data=stop)
plt.show()
sns.lmplot(x="moisture2", y="moisture4", hue="moisture4",palette="viridis", data=stop)
plt.show()
sns.lmplot(x="moisture3", y="moisture4", hue="moisture4",palette="rocket_r", data=stop)
plt.show()
plt.show()
stop[stop.columns[1:]].corr()['moisture3'][:]
hour -0.087577 minute -0.001073 second 0.120826 moisture0 0.913997 moisture1 0.676025 moisture2 0.862147 moisture3 1.000000 moisture4 0.035159 Name: moisture3, dtype: float64
corr = stop.corr()
corr.style.background_gradient(cmap='coolwarm')
| day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | moisture4 | |
|---|---|---|---|---|---|---|---|---|---|
| day | 1.000000 | -0.216850 | -0.016147 | -0.072125 | -0.924280 | -0.611622 | -0.878232 | -0.891575 | -0.025887 |
| hour | -0.216850 | 1.000000 | -0.001180 | 0.070989 | -0.081659 | -0.034199 | -0.033951 | -0.087577 | 0.018892 |
| minute | -0.016147 | -0.001180 | 1.000000 | -0.002409 | -0.002609 | -0.013826 | -0.005886 | -0.001073 | 0.001120 |
| second | -0.072125 | 0.070989 | -0.002409 | 1.000000 | 0.093240 | 0.093260 | 0.091372 | 0.120826 | -0.003285 |
| moisture0 | -0.924280 | -0.081659 | -0.002609 | 0.093240 | 1.000000 | 0.735426 | 0.943547 | 0.913997 | 0.024469 |
| moisture1 | -0.611622 | -0.034199 | -0.013826 | 0.093260 | 0.735426 | 1.000000 | 0.783724 | 0.676025 | -0.004386 |
| moisture2 | -0.878232 | -0.033951 | -0.005886 | 0.091372 | 0.943547 | 0.783724 | 1.000000 | 0.862147 | 0.002836 |
| moisture3 | -0.891575 | -0.087577 | -0.001073 | 0.120826 | 0.913997 | 0.676025 | 0.862147 | 1.000000 | 0.035159 |
| moisture4 | -0.025887 | 0.018892 | 0.001120 | -0.003285 | 0.024469 | -0.004386 | 0.002836 | 0.035159 | 1.000000 |
X = stop.drop('moisture4',axis=1)
X.head()
| day | hour | minute | second | moisture0 | moisture1 | moisture2 | moisture3 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 22 | 16 | 11 | 0.70 | 0.64 | 0.73 | 0.40 |
| 1 | 6 | 22 | 17 | 11 | 0.70 | 0.64 | 0.71 | 0.39 |
| 2 | 6 | 22 | 18 | 11 | 0.69 | 0.63 | 0.70 | 0.39 |
| 3 | 6 | 22 | 19 | 11 | 0.69 | 0.63 | 0.70 | 0.39 |
| 4 | 6 | 22 | 20 | 12 | 0.69 | 0.62 | 0.69 | 0.39 |
y = stop['moisture4']
y.head()
0 0.02 1 0.02 2 0.02 3 0.02 4 0.02 Name: moisture4, dtype: float64
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
sc = StandardScaler()
X = sc.fit_transform(X)
X
array([[-2.23301477, 1.59546095, -0.7775538 , ..., -0.54656403,
1.04483935, 0.3766541 ],
[-2.23301477, 1.59546095, -0.71979354, ..., -0.54656403,
0.63193826, 0.32593835],
[-2.23301477, 1.59546095, -0.66203328, ..., -0.99669488,
0.42548771, 0.32593835],
...,
[ 1.31927341, 1.15597797, -0.89307431, ..., -1.44682573,
-1.84546831, -1.14481856],
[ 1.31927341, 1.15597797, -0.83531405, ..., -1.44682573,
-1.63901776, -1.14481856],
[ 1.31927341, 1.15597797, -0.7775538 , ..., -0.99669488,
-1.63901776, -1.19553432]])
from sklearn import metrics
X.shape
(4117, 8)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
y_test
2648 0.02
843 0.02
2222 0.02
2413 0.02
3911 0.02
...
3254 0.02
2459 0.02
2166 0.02
867 0.02
926 0.02
Name: moisture4, Length: 1236, dtype: float64
def predict(algorithm):
model = algorithm.fit(X_train,y_train)
print('Training Score: {}'.format(model.score(X_train,y_train)))
print('Test Accuracy: {}'.format(model.score(X_test, y_test)))
preds = model.predict(X_test)
print('Predictions are: {}'.format(preds))
print('\n')
r2_score = metrics.r2_score(y_test,preds)
print('r2_score is:{}'.format(r2_score))
print('MAE:',metrics.mean_absolute_error(y_test,preds))
print('MSE:',metrics.mean_squared_error(y_test,preds))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,preds)))
sns.distplot(y_test-preds,color='red')
from sklearn.metrics import accuracy_score as score
from sklearn.linear_model import LinearRegression
predict(LinearRegression())
Training Score: 0.011281276597404633 Test Accuracy: 0.002788402154185632 Predictions are: [0.02048459 0.02100679 0.02061148 ... 0.02074936 0.02131462 0.02079831] r2_score is:0.002788402154185632 MAE: 0.001477051083049631 MSE: 7.211835925786223e-06 RMSE: 0.002685486161905554
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
ln_model = LinearRegression()
ln_model.fit(X_train, y_train)
preds1 = ln_model.predict(X_test)
preds1
array([0.02048459, 0.02100679, 0.02061148, ..., 0.02074936, 0.02131462,
0.02079831])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds1, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.ensemble import RandomForestRegressor
predict(RandomForestRegressor())
Training Score: 0.8634559358163412 Test Accuracy: 0.07568437859218236 Predictions are: [0.0203 0.0209 0.0201 ... 0.0227 0.0211 0.0241] r2_score is:0.07568437859218236 MAE: 0.0011824433656958046 MSE: 6.684652103559864e-06 RMSE: 0.002585469416481244
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
preds2 = rf.predict(X_test)
preds2
array([0.0201, 0.0211, 0.0209, ..., 0.0217, 0.0223, 0.025 ])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds2, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.neighbors import KNeighborsRegressor
predict(KNeighborsRegressor())
Training Score: 0.18146928122792771 Test Accuracy: -0.19792981725695347 Predictions are: [0.02 0.02 0.022 ... 0.022 0.02 0.022] r2_score is:-0.19792981725695347 MAE: 0.0013576051779935275 MSE: 8.663430420711972e-06 RMSE: 0.002943370588409141
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
preds3 = knn.predict(X_test)
preds3
array([0.02 , 0.02 , 0.022, ..., 0.022, 0.02 , 0.022])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds3, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from sklearn.tree import DecisionTreeRegressor
predict(DecisionTreeRegressor())
Training Score: 1.0 Test Accuracy: -0.4990903577926007 Predictions are: [0.02 0.02 0.02 ... 0.02 0.02 0.02] r2_score is:-0.4990903577926007 MAE: 0.0010841423948220119 MSE: 1.0841423948220056e-05 RMSE: 0.0032926317662654073
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
dt = KNeighborsRegressor()
dt.fit(X_train, y_train)
preds4 = dt.predict(X_test)
preds4
array([0.02 , 0.02 , 0.022, ..., 0.022, 0.02 , 0.022])
import plotly.express as px
fig = px.scatter(x=y_test, y=preds4, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()
from xgboost.sklearn import XGBRegressor
predict( XGBRegressor())
Training Score: 0.5397815666750336 Test Accuracy: 0.14051909973409826 Predictions are: [0.0207539 0.02072221 0.02062525 ... 0.02051513 0.02170599 0.02459675] r2_score is:0.14051909973409826 MAE: 0.0012292668977481062 MSE: 6.215767292974362e-06 RMSE: 0.0024931440578061996
C:\Users\DELL\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
preds5 = xgb.predict(X_test)
preds5
array([0.0207539 , 0.02072221, 0.02062525, ..., 0.02051513, 0.02170599,
0.02459675], dtype=float32)
import plotly.express as px
fig = px.scatter(x=y_test, y=preds5, labels={'x': 'True Moisture', 'y': 'Predicted Moisture'})
fig.add_shape(
type="line", line=dict(dash='dash'),
x0=y.min(), y0=y.min(),
x1=y.max(), y1=y.max()
)
fig.show()